from Utils.Function import *
from Utils.RedisHelper import RedisHelper
import time

class Data5uSpider(object):
    config = {
            'name':'data5u',
            'urls': ['http://www.data5u.com/free/gngn/index.shtml', 'http://www.data5u.com/free/gnpt/index.shtml', 'http://www.data5u.com/free/gwgn/index.shtml', 'http://www.data5u.com/free/gwpt/index.shtml'],
            'keyword': '</html>',
            'interval': 60,
            'protocol': '//div[@class="wlist"]//ul[@class="l2"]/span[4]/li/a/text()',
            'ip': '//div[@class="wlist"]//ul[@class="l2"]/span[1]/li/text()',
            'port': '//div[@class="wlist"]//ul[@class="l2"]/span[2]/li/text()',
            'type': 'xpath',
            'enable': 1
        }

    def getlist(self):
        config = self.config
        print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) + ':抓取 '+ config['name']+' 开始')
        type = config['type']
        urls = config['urls']

        n = 0
        for url in urls:
            htmlcontent = GetHtml(url)
            if htmlcontent == '':
                continue
            dic = GetDataByXpath(htmlcontent, config)
            redis = RedisHelper()
            for item in dic:
                ip = item['ip']
                data = {}
                data['ip'] = ip
                data['protocol'] = str(item['protocol']).lower()
                data['port'] = item['port']
                redis.temp_set(ip, data)
                n = n + 1
            print('正在抓取 ' + config['name'] + ',当前抓取到 ' + str(n) + " 条记录")
            time.sleep(2)
        print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) + ':抓取 ' + config['name'] + ' 结束,共抓取到 ' + str(n) + " 条记录")


if __name__ == '__main__':
    spider = Data5uSpider()
    spider.getlist()
